library(conflicted)
library(tidyverse)
library(scales)
library(ggrepel)
library(gghighlight)
library(mapSpain)
library(ESdata)
conflicts_prefer(
dplyr::filter(),
dplyr::select(),
dplyr::lag(),
)Spain Occupational Accident Analysis (SODA)
Business Analytics Project
Introduction
This report covers the analysis of occupational accidents, also known as work accidents, in Spain
Data Exploration
Work Accident Data
load("data/ATR/ATR-I.1.3.RData")
atr |>
filter(sector == "Total",
ccaa == "ES") |>
ggplot(aes(date, accidents)) +
geom_line() +
geom_point()atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point(size=2) +
labs(x = "Year",
y = "Work accidents per 100k workers",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point() +
gghighlight(sector != "total", label_key = sector, use_group_by = FALSE) +
facet_wrap(~sector)atr |>
filter(sector == "Total",
year == 2021) |>
ggplot(aes(x = accidents, y = fct_reorder(ccaa_name, accidents))) +
geom_col() +
labs(x = "Work accidents per 100k workers",
y = NULL,
title = "Work accidents in 2021 per autonomous community")Add year over year change
atr <- atr |>
mutate(acc_yoy = (accidents - lag(accidents)) / lag(accidents),
.by = c(sector, ccaa))atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, color = sector)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_line() +
geom_point() +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, fill = sector)) +
geom_col() +
facet_wrap(~sector) +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021") +
theme(legend.position = c(1, -0.5),
legend.justification = c(1, -0.5))Active working population
work_pop <- epa_sector |>
filter(edad == "total",
sexo == "total") |>
select(-edad, -sexo)
work_pop |>
filter(region == "ES") |>
ggplot(aes(x = periodo, y = valor, color = sector)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "100k workers",
title = "Active working population in Spain")Working population per sector
# Add relative work pop per sector
work_pop <- work_pop |>
mutate(rel = valor / valor[sector == "total"],
.by = c(periodo, region))
work_pop |>
# Add full AACC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
filter(periodo == "2023-01-31",
sector != "total",
!region %in% c("ES-ML", "ES-CE")) |>
ggplot(aes(rel, fct_reorder(nombres, rel), fill = sector)) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage", y = NULL,
title = "Relative working population per sector")Non-EU-working population
work_noEU <- epa_nac |>
# Only view all sexes and working population
filter(sexo == "total",
dato == "act") |>
select(-sexo, -dato) |>
# Compute relative amount of workforce per nationality
mutate(rel = valor / valor[nac == "total"],
.by = c(region, periodo)) |>
# Only keep no-UE
filter(nac == "no-UE") |>
select(-valor, -nac) |>
rename(noEU_rel = rel)
work_noEU |> head()# A tibble: 6 × 3
periodo region noEU_rel
<date> <chr> <dbl>
1 2023-01-31 ES 0.103
2 2022-12-31 ES 0.100
3 2022-09-30 ES 0.0960
4 2022-06-30 ES 0.0974
5 2022-01-31 ES 0.0926
6 2021-12-31 ES 0.0936
Plot non-EU workforce per AACC:
work_noEU |>
filter(periodo == "2023-01-31") |>
# Add full AACC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
ggplot(aes(x = noEU_rel, y = fct_reorder(nombres, noEU_rel))) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage",
y = NULL,
title = "Non-EU working population in 2023 per autonomous community")# Get Spanish AACCs as simple features
ccaa <- esp_get_ccaa()
can_box <- esp_get_can_box()
atr_2021 <- atr |>
filter(year == 2021,
ccaa != "ES",
sector == "Total") |>
mutate(ccaa = as.character(ccaa))
# Join SF and work accidents data
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
ggplot(ccaa_atr) +
# Plot AACC
geom_sf(aes(fill = accidents),
color = "grey70",
linewidth = .3) +
# Plot canaries box
geom_sf(data = can_box, color = "grey70") +
# Plot labels with accident number
geom_label_repel(
aes(label = round(accidents), geometry = geometry),
stat = "sf_coordinates",
fill = alpha(c("white"), 0.5),
color = "black",
size = 3,
label.size = 0
) +
# Adjust color scale
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
theme(legend.position = c(0.12, 0.6)) +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")library(plotly)
library(sf)
ccaa <- esp_get_ccaa() |> st_cast("MULTIPOLYGON")
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
p <- ggplot(ccaa_atr) +
geom_sf(aes(fill = accidents,
text = paste0(ccaa_name, ":\n", round(accidents))),
color = "grey70",
linewidth = .3) +
geom_sf(data = can_box, color = "grey70") +
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")
ggplotly(p, tooltip = "text") |> style(hoveron = "fill")